Data for cfbscrapR comes from https://collegefootballdata.com/. I filter the data set to only include committed recruits.
Sys.setenv(CFBD_API_KEY = "Z5JDzujwt45dUw9h/GVgDo4aJOnvY6QUSNm0xGyKDPeRBfoltU1afsqnuOyEjw95")
seasons <- 2000:2020
recruitData <- data.frame()
for(i in seasons) {
season <- i
recruit <-cfbd_recruiting_player(
year = i,
recruit_type = "HighSchool",
)
recruit$hometown_latitude <- recruit$hometownInfo$latitude
recruit$hometown_longitude <- recruit$hometownInfo$longitude
recruit$hometown_fipscode <- recruit$hometownInfo$fipsCode
recruit <- recruit[ -c(17) ] #if there is a bug, check the column index.
recruitData <- rbind(recruit, recruitData)
}
## 2021-04-28 02:51:00: Scraping player recruiting data...
## 2021-04-28 02:51:00: Scraping player recruiting data...
## 2021-04-28 02:51:00: Scraping player recruiting data...
## 2021-04-28 02:51:01: Scraping player recruiting data...
## 2021-04-28 02:51:01: Scraping player recruiting data...
## 2021-04-28 02:51:01: Scraping player recruiting data...
## 2021-04-28 02:51:01: Scraping player recruiting data...
## 2021-04-28 02:51:02: Scraping player recruiting data...
## 2021-04-28 02:51:02: Scraping player recruiting data...
## 2021-04-28 02:51:02: Scraping player recruiting data...
## 2021-04-28 02:51:03: Scraping player recruiting data...
## 2021-04-28 02:51:04: Scraping player recruiting data...
## 2021-04-28 02:51:05: Scraping player recruiting data...
## 2021-04-28 02:51:05: Scraping player recruiting data...
## 2021-04-28 02:51:06: Scraping player recruiting data...
## 2021-04-28 02:51:06: Scraping player recruiting data...
## 2021-04-28 02:51:07: Scraping player recruiting data...
## 2021-04-28 02:51:07: Scraping player recruiting data...
## 2021-04-28 02:51:07: Scraping player recruiting data...
## 2021-04-28 02:51:08: Scraping player recruiting data...
## 2021-04-28 02:51:08: Scraping player recruiting data...
rm(recruit, season, seasons)
recruitData <- recruitData %>%
filter(!is.na(committed_to))
Data comes from PFR. Inspo from https://github.com/paulg66/NFL_Draft/blob/master/Draft%20Results%20Scrape.R
urlPrefix <- "https://www.pro-football-reference.com/years/"
urlEnd<- "/draft.htm"
startyear <- 2000
endyear <- 2020
draftData <- NULL
for (i in startyear:endyear){
year <- i
url <- paste(urlPrefix,as.character(year),urlEnd,sep = '') #Build URL with increased year
query <- getURL(url)
query <- readHTMLTable(query, stringsAsFactors = F) #pull data
tempDraftData <- query$drafts
tempDraftData$DraftYear <- year #add draft year
draftData <- rbind(draftData,tempDraftData) #rbind to main dataset
}
rm(tempDraftData)
draftData <- draftData[draftData$Rnd != "Rnd",] #remove title rows
draftData$Player <- str_remove(draftData$Player,"HOF") #remove HOFs
draftData$Player <- str_trim(draftData$Player,"right") #remove spaces at end of names
draftData$Pick <- sapply(draftData$Pick, as.numeric) #fix format
draftData$Rnd <- sapply(draftData$Rnd, as.numeric)
draftData$Age <- sapply(draftData$Age, as.numeric)
# Fix school names to match recruitData
draftData$`College/Univ` <- gsub("St.", "State", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("Col.", "College", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub(" (FL)", "", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("North Carolina State", "NC State", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("Mississippi", "Ole Miss", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("North Carolina State", "NC State", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("Ole Miss State", "Mississippi State", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("Hawaii", "Hawai'i", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("Central Florida", "UCF", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("Southern Miss", "Southern Mississippi", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("Cal Poly-San Luis Obispo", "Cal Poly", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("California-Davis", "UC Davis", draftData$`College/Univ`, fixed=TRUE)
draftData$`College/Univ` <- gsub("Massachusetts", "UMass", draftData$`College/Univ`, fixed=TRUE)
draftData <- draftData[ -c(29) ]
cols = names(draftData)
cols[1] = 'Rnd'
cols[2] = 'Pick'
cols[3] = 'Team'
cols[4] = 'Player'
cols[5] = 'Pos'
cols[6] = 'Age'
cols[7] = 'Last Year'
cols[8] = '1st Team All Pro'
cols[9] = 'Pro Bowl'
cols[10] = 'Years as Starter'
cols[11] = 'Career AV'
cols[12] = 'Draft Team AV'
cols[13] = 'Games Played'
cols[14] = 'Passes Completed'
cols[15] = 'Pass Attempts'
cols[16] = 'Passing Yds'
cols[17] = 'Passing TDs'
cols[18] = 'Interceptions Thrown'
cols[19] = 'Rushing Attepts'
cols[20] = 'Rushing Yds'
cols[21] = 'Rushing TDs'
cols[22] = 'Receptions'
cols[23] = 'Recieving Yds'
cols[24] = 'Recieving TDs'
cols[25] = 'Solo Tackles'
cols[26] = 'Interceptions (Def)'
cols[27] = 'Sacks (Def)'
cols[28] = 'College/Univ'
cols[29] = 'Draft Year'
names(draftData) = cols
teamInfo <- cfbd_team_info(conference = NULL, only_fbs = TRUE, year = NULL)
logos <- data.frame(matrix(unlist(teamInfo$logos), nrow=length(teamInfo$logos), byrow=TRUE),stringsAsFactors=FALSE)
teamInfo <- cbind(teamInfo, logos) %>%
rename(
logo1 = X1,
logo2 = X2
)
teamInfo <- teamInfo[ -c(12) ]
rm(logos)
draftSuccessTable <- draftData %>%
filter(`Draft Year` >= 2005) %>%
group_by(`College/Univ`) %>%
summarise(
avgPick = mean(Pick),
numPicks = n(),
avgAge = mean(Age, na.rm = TRUE)
) %>%
mutate(
draftRank = trunc(rank(-numPicks))
)
recruitSuccessTable <- recruitData %>%
filter(year >= 2005, ranking <= 224) %>%
mutate(
fiveStar = case_when(
stars == 5 ~ 1,
stars != 5 ~ 0
)) %>%
group_by(committed_to) %>%
summarise(
numRecruits = n(),
avgRating = mean(rating),
fiveStars = sum(fiveStar)
) %>%
mutate(
recruitRank = trunc(rank(-numRecruits))
)
recruitXdraft <- merge(x = recruitSuccessTable, y = draftSuccessTable, by.x = "committed_to", by.y = "College/Univ", all = TRUE) %>%
mutate(
glowUp = recruitRank - draftRank
) %>%
filter(numPicks >= 20)
recruitXdraft %>%
filter(recruitRank <= 5) %>%
select(committed_to, numRecruits, recruitRank, numPicks, draftRank) %>%
arrange(recruitRank) %>%
gt() %>%
cols_align(
align = "center",
columns = c(3,5)
) %>%
tab_header(
title = "Top 5 recruiting schools and their draft rankings",
subtitle = "Since 2005"
) %>%
tab_options(
table.border.top.color = "white",
row.striping.include_table_body = FALSE
) %>%
tab_source_note(
source_note = "SOURCE: @cfbfastR and @pfrer"
) %>%
cols_label(
committed_to = "SCHOOL",
numRecruits = "TOP 224 RECRUITS",
recruitRank = "RECRUITING RANK",
numPicks = "DRAFT PICKS",
draftRank = "DRAFT RANK",
) %>%
tab_style(
style = list(
cell_fill(color = "lightyellow")
),
locations = cells_body(
columns = vars(committed_to, numRecruits, recruitRank, numPicks, draftRank),
rows = committed_to == "Texas")
) %>%
tab_style(
style = list(
cell_text(weight = "bold")
),
locations = cells_body(
columns = vars(committed_to, numRecruits, recruitRank, numPicks, draftRank),
rows = committed_to == "Texas")
)
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
Top 5 recruiting schools and their draft rankings | ||||
---|---|---|---|---|
Since 2005 | ||||
SCHOOL | TOP 224 RECRUITS | RECRUITING RANK | DRAFT PICKS | DRAFT RANK |
Alabama | 219 | 1 | 109 | 1 |
Texas | 183 | 2 | 58 | 17 |
Ohio State | 181 | 3 | 101 | 3 |
USC | 181 | 3 | 93 | 4 |
Georgia | 178 | 5 | 86 | 6 |
SOURCE: @cfbfastR and @pfrer |
topFive <- recruitXdraft %>%
filter(recruitRank <= 5)
draftData_graph <- draftData %>%
filter(`College/Univ` %in% topFive$committed_to) %>%
group_by(`College/Univ`, `Draft Year`) %>%
summarise(
numPicks = n()
)
## `summarise()` has grouped output by 'College/Univ'. You can override using the `.groups` argument.
schools <- unique(draftData_graph$`College/Univ`)
years <- unique(draftData$`Draft Year`)
seasonsHack <- data.frame(school = sort(rep(schools, length(years))),
year = rep(years, length(schools)))
draftData_graph1 <- merge(x = seasonsHack, y = draftData_graph, by.x = c("school","year"), by.y = c("College/Univ", "Draft Year"), all = TRUE)
draftData_graph1[is.na(draftData_graph1)] <- 0
draftData_graph1 %>%
left_join(teamInfo, by = c('school' = 'school')) %>%
ggplot(aes(x = year, y = numPicks, group = school)) +
geom_line(aes(color = color), size = 1) +
geom_point(aes(color = color), alpha=.7) +
scale_colour_identity() +
geom_hline(yintercept = (mean(topFive$numPicks)/16), color = "black", linetype = "dashed") +
xlim(2005, 2020) +
ylim(0, 15) +
theme_fivethirtyeight() +
theme(
legend.title = element_blank(),
strip.text = element_text(face = "bold"),
axis.title.y = element_text(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, vjust = .7)
) +
labs(
y = "Number of Draft Picks",
title = "Draft picks by season",
subtitle = "Dotted line represents average draft picks for schools with top 5\nrecruiting from 2005 to 2020",
caption = "Data: @cfbfastR | Plot: @LauraStickells"
)
## Warning: Removed 25 row(s) containing missing values (geom_path).
## Warning: Removed 25 rows containing missing values (geom_point).
texasMapData1 <- recruitData %>%
left_join(teamInfo, by = c('committed_to' = 'school')) %>%
filter(committed_to %in% c("Texas"), ranking <= 224, year >= 2005)
seasonsHack <- data.frame(year = 2005:2020)
texasMapData1 <- merge(texasMapData1, seasonsHack, by="year", all.x=T, all.y=T)
texasMapData1$hometown_latitude[is.na(texasMapData1$hometown_latitude)] <- 33
texasMapData1$hometown_longitude[is.na(texasMapData1$hometown_longitude)] <- -96
texasMapData1$hometown_longitude <- sapply(texasMapData1$hometown_longitude, as.numeric)
texasMapData1$hometown_latitude <- sapply(texasMapData1$hometown_latitude, as.numeric)
texasMapData1Transformed <- usmap_transform(texasMapData1[ c(18, 17) ])
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum unknown in Proj4 definition
texasMapData1 <- texasMapData1 %>%
left_join(texasMapData1Transformed, by = c('hometown_longitude', 'hometown_latitude'))
asp_ratio <- 1.618
TexasRecruitsMap1 <- plot_usmap("states") +
geom_image(data = texasMapData1,
aes(x = hometown_longitude.1, y = hometown_latitude.1,
image = logo1),
size = 0.07, by = "width", asp = asp_ratio) +
theme_economist() +
theme(
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()
) +
labs(
title = " Texas commits in 247 Sports' top 224 in {closest_state}",
subtitle = "",
caption = "Data: @cfbfastR | Plot: @LauraStickells"
) +
transition_states(
year,0,30
)
animate(TexasRecruitsMap1,
duration = 60,
fps = 2,
height = 6,
width = 6,
units = "in",
res = 300
)